#!user/bin/python
#coding: utf-8

from MekongInfo import *
from Crawler import *
from DAO import *
from threading import Thread
from Queue import Queue
class Process:
    url = u'http://www.mekonginfo.org/news'
    pagesize = 20
    q = Queue()

    def test(self):
        print u'test'
        return True

    def record_news_onepage(self,url):
        cr = Crawler()
        src_html = cr.get_src_html(url)
        m = MekongInfo()
        m.pre_process(src_html)
        reslist = m.get_info_list()
        d =DAO(u'sqlite')
        for each in reslist:
            d.insert(each)
        del d

    def record_news_pages(self,startpage,endpage):
        ''''''
        for i in range(startpage-1,endpage):
            print u'processing page:%d' % (i+1)
            url = self.url + u'?start=' +str(i*self.pagesize)
            self.record_news_onepage(url)

    def exportExcel(self,filename):
        dao = DAO(u'sqlite')
        parser = ParserUtil()
        datalist = dao.query_all()
        parser.exportExcel(datalist,filename)

    def extract_text_multi_threads(self,threadsnum,start,end):
        ''''''
        #向队列中添加数据
        dao = DAO(u'sqlite')
        m = MekongInfo()
        datalist = m.pack_info_from_query(dao.query_not_download())
        if end == 0: end = len(datalist)
        for i in range(start-1,end):
            record = datalist[i]
            self.q.put(record)
        #建立多个线程
        for i in range(threadsnum):
            t=Thread(target=self.workingthread)
            t.setDaemon(True)
            t.start()
        t.join()
        del dao

    def workingthread(self):
        '''单个线程'''
        while not self.q.empty():
            record=self.q.get()
            print u"processing id:%d title:%s" % (record.id,record.title)
            href = record.href
            print href
            cr = Crawler()
            record.text = cr.get_content_by_Goose(href)
            if not record.text=="":
                dao = DAO(u'sqlite')
                dao.update_text(record)
                del dao

if __name__ == "__main__":
    url = u'http://www.mekonginfo.org/news'
    p = Process()
    p.exportExcel()